#str(df)
# clean dataframe to keep useful columns
tidy_df <- df %>%
dplyr::select(year, cause_name, state, deaths, age_adjusted_death_rate) %>%
mutate(cause = as.factor(cause_name),
state = as.factor(state),
rate = age_adjusted_death_rate/100) %>% #rate is in decimal value
dplyr::select(year, state, cause, deaths, rate) %>%
# filter out "all causes"
filter(cause != "All causes") %>%
# replace "CLRD" with its whole name
mutate(cause = replace(as.character(cause),
cause == "CLRD",
"Chronic lower respiratory diseases"))
head(tidy_df)
## # A tibble: 6 x 5
## year state cause deaths rate
## <int> <fct> <chr> <int> <dbl>
## 1 2016 Alabama Unintentional injuries 2755 0.555
## 2 2016 Alaska Unintentional injuries 439 0.631
## 3 2016 Arizona Unintentional injuries 4010 0.542
## 4 2016 Arkansas Unintentional injuries 1604 0.518
## 5 2016 California Unintentional injuries 13213 0.32
## 6 2016 Colorado Unintentional injuries 2880 0.512
skimr::skim(tidy_df)
## Skim summary statistics
## n obs: 9360
## n variables: 5
##
## ── Variable type:character ───────────────────────────────────────────────────────────────
## variable missing complete n min max empty n_unique
## cause 0 9360 9360 6 34 0 10
##
## ── Variable type:factor ──────────────────────────────────────────────────────────────────
## variable missing complete n n_unique
## state 0 9360 9360 52
## top_counts ordered
## Ala: 180, Ala: 180, Ari: 180, Ark: 180 FALSE
##
## ── Variable type:integer ─────────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25 p50 p75
## deaths 0 9360 9360 7307.49 39644.23 21 547 1440.5 3431.5
## year 0 9360 9360 2007.5 5.19 1999 2003 2007.5 2012
## p100 hist
## 725192 ▇▁▁▁▁▁▁▁
## 2016 ▇▅▅▅▅▅▅▇
##
## ── Variable type:numeric ─────────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25 p50 p75 p100
## rate 0 9360 9360 0.61 0.68 0.026 0.18 0.31 0.55 3.47
## hist
## ▇▂▁▁▁▁▁▁
The first plot is intended to deliver a message to general population and is trying to answer what leading death causes are across years.
plot1_v1 <- tidy_df %>%
# calculate yearly total deaths from all states
group_by(year, cause) %>%
summarise(deaths_by_year_cause = sum(deaths)) %>%
# plot
ggplot(aes(x = fct_reorder(cause, deaths_by_year_cause),
y = deaths_by_year_cause/1000000)) +
geom_col(fill = "steelblue",
alpha = 0.7) +
scale_y_continuous(expand = c(0, 0)) +
coord_flip() +
facet_wrap(~year, ncol = 3) +
theme_minimal(base_size = 20) +
theme(panel.grid.minor = element_blank(),
plot.title = element_text(face = "bold")) +
labs(y = "No. of deaths (in millions)",
x = "Causes of deaths",
title = "Heart disease is a leading cause of death over years",
caption = "Source: Centers for Disease Control and Prevention")
plot1_v1
Too many facets. A total of 18 facets make it hard to compare causes of death across years.
plot1_v2 <- tidy_df %>%
group_by(year, cause) %>%
summarise(deaths_by_year_cause = sum(deaths)) %>%
# plot
ggplot(aes(x = fct_reorder(cause, deaths_by_year_cause),
y = deaths_by_year_cause/1000000)) +
geom_col(fill = "steelblue",
alpha = 0.7) +
scale_y_continuous(expand = c(0, 0)) +
coord_flip() +
theme_minimal(base_size = 15) +
theme(panel.grid.minor = element_blank(),
plot.margin = margin(1, 0.5, 1, 0.5, "cm")) +
transition_time(year) +
labs(title = "Top 10 Causes of Deaths",
subtitle = "Year: {round(frame_time)}",
caption = "Source: Centers for Disease Control and Prevention",
y = "No. of deaths (in millions)",
x = "Causes of deaths")
animate(plot1_v2, duration = 25,
nframes = 100,
end_pause = 10,
renderer = gifski_renderer(width = 1000))
Rank of causes changes. fct_reorder cannot reflect an accurate rank of causes on y-axis.
plot1_v3 <- tidy_df %>%
group_by(year, cause) %>%
summarise(deaths_by_year_cause = sum(deaths)) %>%
# creat rank per year
group_by(year) %>%
arrange(desc(deaths_by_year_cause)) %>%
mutate(rank = row_number()) %>%
arrange(year) %>%
ungroup() %>%
#plot
ggplot(aes(x = '{frame_time}', y = rank)) +
geom_text(aes(label = cause,
color = cause,
group = cause),
size = 8) +
scale_x_discrete(labels = NULL) +
scale_y_reverse() +
scale_color_paletteer_d(rcartocolor, Vivid) +
transition_states(year,
transition_length = 1,
state_length = 3) +
ease_aes('sine-in-out') +
theme_void(base_size = 28) +
theme(plot.title = element_text(hjust = 0.5)) +
labs(title = "Rank of causes of death: {closest_state}") +
guides(color = "none")
animate(plot1_v3, duration = 30, nframes = 300,
renderer = gifski_renderer(width = 1200))
I am trying to combine the feasures of version 1 and version 2 to plot bra graph animation with changes in y axis accroding to the rank of cuases.
The second plot is for policy-maker or health-related research and is a quick summary plot regarding changes in death causes over years.
# create a new df so annotation can be added
df_plot2 <- tidy_df %>%
group_by(year, cause) %>%
summarise(deaths_by_year_cause = sum(deaths))
df_plot2 %>%
ggplot(aes(x = year, y = deaths_by_year_cause/1000000, color = cause)) +
geom_line(size = 2) +
scale_x_continuous(breaks = seq(1999, 2016, by = 2),
expand = c(0, 0)) +
scale_y_log10(expand = c(0, 0),
breaks = c(0.1, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5)) +
scale_color_paletteer_d(rcartocolor, Vivid) +
theme_minimal(base_size = 30) +
theme(panel.grid.minor = element_blank()) +
geom_text(data = filter(df_plot2, year == 2016),
aes(label = cause),
nudge_x = 3,
hjust = 1.5,
size = 8) +
guides(color = "none")
Annotations of cause seems really hard to be ajusted/organized in a neat way. I failed to get a perfect annotation by ajusting fig.height, fig.width, or nudge_x.
tidy_df %>%
group_by(year, cause) %>%
summarise(deaths_by_year_cause = sum(deaths)) %>%
mutate(cause = factor(cause,
levels = c("Heart disease", "Cancer", "Unintentional injuries",
"Chronic lower respiratory diseases", "Stroke", "Alzheimer's disease", "Diabetes", "Influenza and pneumonia", "Kidney disease" , "Suicide"
))) %>%
ggplot(aes(x = year, y = deaths_by_year_cause/1000000, color = cause)) +
geom_line(size = 2) +
scale_x_continuous(breaks = seq(1999, 2016, by = 2),
expand = c(0, 0)) +
scale_y_log10(expand = c(0, 0),
breaks = c(0.1, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5)) +
scale_color_paletteer_d(rcartocolor, Vivid) +
theme_minimal(base_size = 30) +
theme(panel.grid.minor = element_blank(),
legend.key.size = unit(3, 'lines'),
plot.title = element_text(face = "bold"),
plot.subtitle = element_text(face = "italic")) +
labs(title = "Caues of Death Across Years",
subtitle = "Heart disease and cancer are top 2 causes of deaths.\nAlzheimer's disease increases rapdly.",
x = "Year",
y = "No. of deaths (in millions)",
caption = "Source: Centers for Disease Control and Prevention ",
color = "")
The last plot is for both parties and is for displaying a distinct death causes in each state.
usa <- as_tibble(map_data("state"))
usa$region <- str_to_title(usa$region)
usa <- usa %>%
rename(state = region)
distinct_by_state <- tidy_df %>%
# creat rank per year and state
group_by(year, state) %>%
arrange(desc(deaths)) %>%
mutate(rank = row_number()) %>%
arrange(year) %>%
ungroup() %>%
# only select rank no. 1
filter(rank == 1,
state != "United States")
distinct_map <- full_join(usa, distinct_by_state, by = "state") %>%
filter(!is.na(lat), !is.na(long), !is.na(year))
## Warning: Column `state` joining character vector and factor, coercing into
## character vector
# map data with USA states
plot3 <- distinct_map %>%
mutate(cause = factor(cause, levels = c("Heart disease", "Cancer"))) %>%
ggplot(aes(long, lat, group = group, fill = cause))+
geom_polygon(color = "white")+
coord_map()+
labs(title = "A Distinct Cause of Death",
subtitle = "Cancer emerges as a leading cause in early 20th.",
caption = "Source: Centers for Disease Control and Prevention ",
fill = "")+
theme_void(base_size = 35) +
facet_wrap(~year, ncol = 3) +
theme(plot.title = element_text(face = "bold",
hjust = 0.5,
vjust = 10),
plot.subtitle = element_text(face = "italic",
hjust = 0.5,
vjust = 10),
legend.position = "top")
plot3
I tried to animate the above graph but failed to make the transisition smmothly. Debugin now.
ani_plot3 <- distinct_map %>%
mutate(cause = factor(cause, levels = c("Heart disease", "Cancer"))) %>%
ggplot(aes(long, lat, group = group, fill = cause))+
geom_polygon(color = "white")+
coord_map()+
theme_void() +
transition_time(year) +
labs(title = "A Distinct Cause of Death\nCancer emerges as a leading cause in early 20th.",
subtitle = "Year: {round(frame_time)}",
caption = "Source: Centers for Disease Control and Prevention ",
fill = "")
animate(ani_plot3, duration = 30, renderer = gifski_renderer(width = 1200))